#Load Libraries

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(purrr)
library(tibble)
library(stringr)
library(forcats)
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:dplyr':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(reshape)
## 
## Attaching package: 'reshape'
## The following objects are masked from 'package:reshape2':
## 
##     colsplit, melt, recast
## The following object is masked from 'package:lubridate':
## 
##     stamp
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
library(network)
## network: Classes for Relational Data
## Version 1.16.0 created on 2019-11-30.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.
library(tidygraph)
## 
## Attaching package: 'tidygraph'
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:stats':
## 
##     filter
library(ggraph)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(visNetwork)
library(networkD3)
library(here)
## here() starts at /Users/abigailhorn/Documents/GitHub/PM566
lab.dir <- here("static/slides/11-interactive-viz/lab/")

#Load data

mapping <- read.csv(paste0(lab.dir,"Covid research - SPA Mapping.csv"))
#case_data <- read.csv("latimes-place-totals.csv")

case_data <- read.csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv ")

case_data <- subset(case_data, county == "Los Angeles")

str(mapping)
## 'data.frame':    782 obs. of  5 variables:
##  $ City      : Factor w/ 377 levels "","Acton","Adams-Normandie",..: 2 2 5 5 9 9 12 12 36 36 ...
##  $ place     : Factor w/ 711 levels "- Under Investigation",..: 2 545 5 546 9 548 12 659 36 659 ...
##  $ SPA       : Factor w/ 12 levels "","1","2","3",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ SPA.Name  : Factor w/ 11 levels "","Antelope Valley",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Clean.Name: Factor w/ 344 levels "","Acton","Adams-Normandie",..: 2 2 5 5 9 9 12 12 30 30 ...
str(case_data)
## 'data.frame':    65601 obs. of  8 variables:
##  $ date           : Factor w/ 220 levels "2020-03-16","2020-03-17",..: 219 219 219 219 219 219 219 219 219 219 ...
##  $ county         : Factor w/ 49 levels "Alameda","Amador",..: 16 16 16 16 16 16 16 16 16 16 ...
##  $ fips           : int  37 37 37 37 37 37 37 37 37 37 ...
##  $ place          : Factor w/ 1242 levels "90755: Long Beach",..: 193 194 196 197 201 205 207 211 213 214 ...
##  $ confirmed_cases: int  78 292 202 30 1383 394 762 19 2 78 ...
##  $ note           : Factor w/ 9 levels "","1 to 4","1 to 5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ x              : num  -118 -118 -119 -118 -118 ...
##  $ y              : num  34.5 34 34.2 34.5 34.1 ...

#Fix dates

case_data$date <- as.Date(case_data$date)

#Merge datasets

case_data$place <- as.character(case_data$place)
mapping$place  <- as.character(mapping$place)

mapping$City.Name <- mapping$Clean.Name
mapping$Clean.Name <- NULL

data <- merge(case_data, mapping, by = "place", all = T)

str(data)
## 'data.frame':    71739 obs. of  12 variables:
##  $ place          : chr  "- Under Investigation" "Acton" "Acton" "Acton" ...
##  $ date           : Date, format: NA "2020-06-13" ...
##  $ county         : Factor w/ 49 levels "Alameda","Amador",..: NA 16 16 16 16 16 16 16 16 16 ...
##  $ fips           : int  NA 37 37 37 37 37 37 37 37 37 ...
##  $ confirmed_cases: int  NA 16 73 20 8 33 70 45 78 11 ...
##  $ note           : Factor w/ 9 levels "","1 to 4","1 to 5",..: NA 1 1 1 1 1 1 1 1 1 ...
##  $ x              : num  NA -118 -118 -118 -118 ...
##  $ y              : num  NA 34.5 34.5 34.5 34.5 ...
##  $ City           : Factor w/ 377 levels "","Acton","Adams-Normandie",..: 1 2 2 2 2 2 2 2 2 2 ...
##  $ SPA            : Factor w/ 12 levels "","1","2","3",..: 12 2 2 2 2 2 2 2 2 2 ...
##  $ SPA.Name       : Factor w/ 11 levels "","Antelope Valley",..: 10 2 2 2 2 2 2 2 2 2 ...
##  $ City.Name      : Factor w/ 344 levels "","Acton","Adams-Normandie",..: 290 2 2 2 2 2 2 2 2 2 ...

#Create daily table total

summary.LA <- data %>%
 group_by(date) %>% 
 summarise(confirmed_cases = sum(confirmed_cases)) %>%
 ungroup()

summary.LA
summary.LA = summary.LA %>%
  arrange(date) %>%  # first sort by day
  mutate(Diff_day = date - lag(date),  # Difference in time (just in case there are gaps)
         new_cases = confirmed_cases - lag(confirmed_cases)) # Difference in case between days 

summary.LA

#Create daily table per City

summary.city <- data %>%
 group_by(date, City.Name) %>% 
 summarise(confirmed_cases = sum(confirmed_cases)) %>%
 ungroup()
## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
summary.city

#Add number of new case per city

summary.city = summary.city %>%
  group_by(City.Name) %>%
  arrange(date) %>%  # first sort by day
  mutate(Diff_day = date - lag(date),  # Difference in time (just in case there are gaps)
         new_cases = confirmed_cases - lag(confirmed_cases)) %>% # Difference in case between days 
 arrange(City.Name)
## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`

## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`

## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`

## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
summary.city
#forcats::fct_explicit_na

summary.city <- merge(summary.city, mapping, by = "City.Name", all = F)
summary.city
#write_csv(summary.city, "summary_city.csv")

#Create daily table per SPA

summary.SPA <- data %>%
 group_by(date, SPA, SPA.Name) %>% 
 summarise( confirmed_cases = sum(confirmed_cases)) %>%
 ungroup()
## Warning: Factor `SPA` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `SPA.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
summary.SPA
summary.SPA = summary.SPA %>%
  arrange(SPA,date) %>%  # first sort by day
  mutate(Diff_day = date - lag(date),  # Difference in time (just in case there are gaps)
         new_cases = confirmed_cases - lag(confirmed_cases)) %>% # Difference in case between days 
 arrange(SPA)

summary.SPA
#write_csv(summary.SPA, "Summary_SPA.csv")

Plots

LA County

LA.plot <- ggplot(data = summary.LA, aes(x = date, y = confirmed_cases)) + 
  geom_line()

ggplotly(LA.plot)

By SPA

SPA.plot <- ggplot(data = subset(summary.SPA, SPA == 1 | SPA == 2 | SPA == 3 | SPA == 4 | SPA == 5 | SPA == 6 | SPA == 7 | SPA == 8), aes(x = date, y = confirmed_cases, color = SPA)) + 
  geom_line()

ggplotly(SPA.plot)  

By City

City.plot <- ggplot(data = subset(summary.city, City %in% c("Woodland Hills", "Sierra Madre")), aes(x = date, y = confirmed_cases, color = City)) +
  geom_line()

ggplotly(City.plot)

{-}

#write_csv(data, "data.csv")